library(simona)
dag = create_ontology_DAG_from_GO_db(org_db = "org.Hs.eg.db")
dag
## An ontology_DAG object:
## Source: GO BP / GO.db package
## 27942 terms / 55956 relations
## Root: GO:0008150
## Terms: GO:0000001, GO:0000002, GO:0000003, GO:0000011, ...
## Max depth: 18
## Avg number of parents: 2.00
## Aspect ratio: 363.92:1 (based on the longest distance to root)
## 782.78:1 (based on the shortest distance to root)
## Relations: is_a, part_of
## Annotations are available.
##
## With the following columns in the metadata data frame:
## id, name, definition
Let’s make a comparison of various similarity methods, using the GO BP ontology.
set.seed(123)
ic = term_IC(dag, method = "IC_annotation")
ic = ic[!is.na(ic)]
go_id = sample(names(ic), 500)
lt = lapply(all_term_sim_methods(), function(method) {
term_sim(dag, go_id, method)
})
names(lt) = all_term_sim_methods()
df = as.data.frame(lapply(lt, function(x) x[lower.tri(x)]))
LCA_depth = LCA_depth(dag, go_id)
LCA_depth = LCA_depth[lower.tri(LCA_depth)]
And the heatmap of the correlations of semantic similarities.
cor = cor(df, use = "pairwise.complete.obs")
library(ComplexHeatmap)
Heatmap(cor, name = "correlation", column_title = "Pearson correlation")
ind = which(colnames(df) %in% c("Sim_Jiang_1997",
"Sim_Dice", "Sim_Kappa", "Sim_Jaccard", "Sim_Overlap",
"Sim_AIC_2014", "Sim_universal", "Sim_HRSS_2013"))
cor2 = cor[-ind, -ind]
df2 = df[, -ind]
Heatmap(cor2, name = "correlation", column_title = "Pearson correlation")
group = c("Sim_Pekar_2002" = 1, "Sim_Stojanovic_2001" = 1, "Sim_WP_1994" = 1,
"Sim_Shenoy_2012" = 1, "Sim_Li_2003" = 1, "Sim_Wang_edge_2012" = 1,
"Sim_SSDD_2013" = 2, "Sim_RSS_2013" = 2, "Sim_Zhong_2002" = 2,
"Sim_Slimani_2006" = 2, "Sim_Shen_2010" = 3, "Sim_Zhang_2006" = 3,
"Sim_EISI_2015" = 3, "Sim_XGraSM_2013" = 3, "Sim_Lin_1998" = 3,
"Sim_Resnik_1999" = 3, "Sim_FaITH_2010" = 3, "Sim_Relevance_2006" = 3,
"Sim_SimIC_2010" = 3, "Sim_Wang_2007" = 4, "Sim_Ancestor" = 4,
"Sim_GOGO_2018" = 4, "Sim_AlMubaid_2006" = 4, "Sim_Rada_1989" = 4,
"Sim_Leocock_1998" = 4, "Sim_Resnik_edge_2005" = 4)
library(ggrepel)
library(ggplot2)
loc = cmdscale(as.dist(1-cor2))
loc = as.data.frame(loc)
colnames(loc) = c("x", "y")
loc$method = rownames(loc)
loc$group = group[rownames(loc)]
ggplot(loc, aes(x, y, label = method, col = factor(group))) +
geom_point() +
geom_text_repel(show.legend = FALSE) +
labs(x = "Dimension 1", y = "Dimension 2") +
ggtitle("MDS based on the correlation between similarities")
Select an individual similarity heatmap by: Use order from Sim_Lin_1998
Prev method: Sim_Ancestor Curr method: Sim_Lin_1998 Next method: Sim_Resnik_1999
ind = sample(nrow(df2), 5000)
pairs(df2[ind, names(group)], pch = ".", gap = 0, col = LCA_depth[ind]+1)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] C/UTF-8/C/C/C/C
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggrepel_0.9.4 ggplot2_3.4.4 ComplexHeatmap_2.16.0
## [4] simona_0.99.10 knitr_1.44 rmarkdown_2.25
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 farver_2.1.1 dplyr_1.1.3
## [4] blob_1.2.4 Biostrings_2.68.1 bitops_1.0-7
## [7] fastmap_1.1.1 RCurl_1.98-1.12 digest_0.6.33
## [10] lifecycle_1.0.3 cluster_2.1.4 KEGGREST_1.40.1
## [13] RSQLite_2.3.1 magrittr_2.0.3 compiler_4.3.1
## [16] rlang_1.1.1 sass_0.4.7 tools_4.3.1
## [19] igraph_1.5.1 utf8_1.2.3 yaml_2.3.7
## [22] labeling_0.4.3 bit_4.0.5 scatterplot3d_0.3-44
## [25] xml2_1.3.5 RColorBrewer_1.1-3 withr_2.5.1
## [28] BiocGenerics_0.46.0 stats4_4.3.1 fansi_1.0.5
## [31] colorspace_2.1-0 GO.db_3.17.0 scales_1.2.1
## [34] iterators_1.0.14 cli_3.6.1 crayon_1.5.2
## [37] generics_0.1.3 ragg_1.2.6 RcppParallel_5.1.7
## [40] httr_1.4.7 rjson_0.2.21 DBI_1.1.3
## [43] cachem_1.0.8 zlibbioc_1.46.0 parallel_4.3.1
## [46] AnnotationDbi_1.62.2 XVector_0.40.0 proxyC_0.3.3
## [49] matrixStats_1.0.0 vctrs_0.6.4 Matrix_1.6-1.1
## [52] jsonlite_1.8.7 IRanges_2.34.1 GetoptLong_1.0.5
## [55] S4Vectors_0.38.2 bit64_4.0.5 clue_0.3-65
## [58] systemfonts_1.0.5 magick_2.8.0 foreach_1.5.2
## [61] jquerylib_0.1.4 glue_1.6.2 codetools_0.2-19
## [64] Polychrome_1.5.1 shape_1.4.6 gtable_0.3.4
## [67] GenomeInfoDb_1.36.4 munsell_0.5.0 tibble_3.2.1
## [70] pillar_1.9.0 htmltools_0.5.6.1 GenomeInfoDbData_1.2.10
## [73] circlize_0.4.15 R6_2.5.1 textshaping_0.3.7
## [76] doParallel_1.0.17 evaluate_0.22 Biobase_2.60.0
## [79] lattice_0.21-9 png_0.1-8 memoise_2.0.1
## [82] bslib_0.5.1 Rcpp_1.0.11 org.Hs.eg.db_3.17.0
## [85] xfun_0.40 pkgconfig_2.0.3 GlobalOptions_0.1.2